1 Introduction

In this notebook, we are going to compute specific chromatin accesibility patterns for each of the cell types defined at level 1. To do this, we will use an differential accessible test (DA) followed by obtaining the chromatin signature per each group of cells. We used Signac and ChromVar tools to achieve the above goals.

2 Pre-processing

2.1 Load packages

library(Seurat)
library(Signac)
library(reshape)
library(ggplot2)
library(JASPAR2020)
library(TFBSTools)
library(BSgenome.Hsapiens.UCSC.hg38)
library(patchwork)
library(chromVAR)
library(motifmatchr)
library(ggpubr)
library(dplyr)
library(tidyr)

set.seed(1234)

2.2 Parameters

path_to_ATAC <- here::here("scATAC-seq/results/R_objects/8.2.tonsil_peakcalling_annotation_level1_integrated.rds")
path_to_save <- here::here("scATAC-seq/results/R_objects/8.3.tonsil_peakcalling_annotation_level1_signature.rds")
path_to_save_df <- here::here("scATAC-seq/results/files/2.da_peaks_level1.tsv")

3 Load data

seurat <- readRDS(path_to_ATAC)
seurat
## An object of class Seurat 
## 270784 features across 101076 samples within 1 assay 
## Active assay: peaks_macs (270784 features, 270784 variable features)
##  3 dimensional reductions calculated: umap, lsi, harmony
table(seurat$annotation_level_1)
## 
##    NBC_MBC       GCBC         PC      CD4_T  Cytotoxic    myeloid        FDC        PDC epithelial 
##      41085      27497       2548      25011       3635        876        113        310          1
# Remove epithelial cell
toRemove = row.names(seurat@meta.data[which(seurat@meta.data$annotation_level_1 == "epithelial"),])
seurat <- seurat[,!colnames(seurat) %in% toRemove]
table(seurat$annotation_level_1)
## 
##    NBC_MBC       GCBC         PC      CD4_T  Cytotoxic    myeloid        FDC        PDC epithelial 
##      41085      27497       2548      25011       3635        876        113        310          0
DimPlot(
  seurat, reduction = "umap",
  group.by = "annotation_level_1",
  cols = c("#a6cee3", "#1f78b4","#b2df8a", 
             "#33a02c", "#fb9a99","#e31a1c", 
             "#fdbf6f", "#ff7f00","#cab2d6",
             "#6a3d9a"),
  pt.size = 0.1
)

CoveragePlot(
  object = seurat,
  region = "chr19-42282829-42284493",
  extend.upstream = 1000,
  extend.downstream = 1000
)

4 Detection of features to include in each module.

To do that, we are going to compute all differentially accessible peaks between all the clusters defined at level1. We used a logistic regression, as suggested by Ntranos et.al 2018, adding the total number of fragments per peak as a latent variable to mitigate the effect of sequencing depth.

5 Get differentially accessible (DA) peaks

#differential.peaks <- FindAllMarkers(
 # object = seurat,
#  test.use = 'LR',
#  min.pct = 0.2,
#  only.pos = TRUE,
 # latent.vars = 'nCount_peaks_macs'
#)

#write.table(differential.peaks, path_to_save_df, quote = F)
differential.peaks <- read.table(path_to_save_df)

6 Adding Chromatin signature using the top 2000 differentially accessible peaks

Initially, we decided to filter out DA peaks that present an avg_log2FC less than 0.5. Then, the top 2000 DA peaks per group, defined at level 1, were considered the features to compute chromVAR deviations and thus add the chromatin signature in each cell-type.

chromatin_module <- function(seurat, top_peaks){
  DA_filt <- differential.peaks[differential.peaks$avg_log2FC > 0.5, ]
  DA_filt_2000 <- DA_filt %>% group_by(cluster) %>% top_n(n = top_peaks) #, wt = avg_log2FC)
  features <- as.list(spread(DA_filt_2000, cluster, gene)[6:13])
  features_na_omit <- lapply(features, function(x) x[!is.na(x)])

  seurat <- AddChromatinModule(seurat, 
                             features = features_na_omit, 
                             genome = BSgenome.Hsapiens.UCSC.hg38)
  return(seurat)}
seurat <- chromatin_module(seurat, 2000)
# Replace NA values per 0 to avoid problems in the representation
seurat@meta.data[c("NBC.MBC", "GCBC", "PC", "CD4.T","Cytotoxic",  
                   "myeloid",  "FDC", "PDC")][is.na(seurat@meta.data[c("NBC.MBC","GCBC", "PC","CD4.T",
                   "Cytotoxic",  "myeloid",  "FDC", "PDC")])] <- 0

7 Signature visualization

To visualize this signature, we plotted the scores using FeaturePlot, with a maximum cutoff of ‘q95’.

FeaturePlot(seurat, "NBC.MBC", max.cutoff = "q95") + 
scale_color_viridis_c(option = "magma")

FeaturePlot(seurat, "GCBC", max.cutoff = "q95") + 
scale_color_viridis_c(option = "magma")

FeaturePlot(seurat, "PC", max.cutoff = "q95") + 
scale_color_viridis_c(option = "magma")

FeaturePlot(seurat, "CD4.T", max.cutoff = "q95") + 
scale_color_viridis_c(option = "magma")

FeaturePlot(seurat, "Cytotoxic", max.cutoff = "q95") + 
scale_color_viridis_c(option = "magma")

FeaturePlot(seurat, c("myeloid"), max.cutoff = "q95") + 
scale_color_viridis_c(option = "magma")

FeaturePlot(seurat, c("FDC"), max.cutoff = "q95") + 
scale_color_viridis_c(option = "magma")

FeaturePlot(seurat, c("PDC"), max.cutoff = "q95") + 
scale_color_viridis_c(option = "magma")

saveRDS(seurat,path_to_save)

8 Session Information

sessionInfo()
## R version 4.0.3 (2020-10-10)
## Platform: x86_64-apple-darwin13.4.0 (64-bit)
## Running under: macOS Big Sur 10.16
## 
## Matrix products: default
## BLAS/LAPACK: /Users/pauli/opt/anaconda3/envs/Tonsil_atlas/lib/libopenblasp-r0.3.10.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats4    parallel  stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] tidyr_1.1.2                       dplyr_1.0.2                       ggpubr_0.4.0                      motifmatchr_1.10.0                chromVAR_1.10.0                   patchwork_1.1.0                   BSgenome.Hsapiens.UCSC.hg38_1.4.3 BSgenome_1.56.0                   rtracklayer_1.48.0                Biostrings_2.56.0                 XVector_0.28.0                    GenomicRanges_1.40.0              GenomeInfoDb_1.24.0               IRanges_2.22.1                    S4Vectors_0.26.0                  BiocGenerics_0.34.0               TFBSTools_1.26.0                  JASPAR2020_0.99.10                ggplot2_3.3.2                     reshape_0.8.8                     Signac_1.1.0.9000                 Seurat_3.9.9.9010                 BiocStyle_2.16.1                 
## 
## loaded via a namespace (and not attached):
##   [1] rappdirs_0.3.1              SnowballC_0.7.0             GGally_2.0.0                R.methodsS3_1.8.1           nabor_0.5.0                 bit64_4.0.5                 knitr_1.30                  irlba_2.3.3                 DelayedArray_0.14.0         R.utils_2.10.1              data.table_1.13.2           rpart_4.1-15                KEGGREST_1.28.0             RCurl_1.98-1.2              AnnotationFilter_1.12.0     generics_0.1.0              GenomicFeatures_1.40.1      cowplot_1.1.0               RSQLite_2.2.1               RANN_2.6.1                  future_1.20.1               bit_4.0.4                   spatstat.data_2.1-0         xml2_1.3.2                  httpuv_1.5.4                SummarizedExperiment_1.18.1 assertthat_0.2.1            DirichletMultinomial_1.30.0 xfun_0.18                   hms_0.5.3                   evaluate_0.14               promises_1.1.1              progress_1.2.2              readxl_1.3.1                caTools_1.18.0              dbplyr_1.4.4                igraph_1.2.6                DBI_1.1.0                   htmlwidgets_1.5.2           purrr_0.3.4                 ellipsis_0.3.1              backports_1.2.0            
##  [43] bookdown_0.21               annotate_1.66.0             biomaRt_2.44.4              deldir_0.2-3                vctrs_0.3.4                 Biobase_2.48.0              here_1.0.1                  ensembldb_2.12.1            ROCR_1.0-11                 abind_1.4-5                 withr_2.3.0                 ggforce_0.3.2               checkmate_2.0.0             sctransform_0.3.1           GenomicAlignments_1.24.0    prettyunits_1.1.1           goftest_1.2-2               cluster_2.1.0               lazyeval_0.2.2              seqLogo_1.54.3              crayon_1.3.4                labeling_0.4.2              pkgconfig_2.0.3             tweenr_1.0.1                nlme_3.1-150                ProtGenerics_1.20.0         nnet_7.3-14                 rlang_0.4.11                globals_0.13.1              lifecycle_0.2.0             miniUI_0.1.1.1              BiocFileCache_1.12.1        rsvd_1.0.3                  dichromat_2.0-0             rprojroot_2.0.2             cellranger_1.1.0            polyclip_1.10-0             matrixStats_0.57.0          lmtest_0.9-38               graph_1.66.0                Matrix_1.3-4                ggseqlogo_0.1              
##  [85] carData_3.0-4               zoo_1.8-8                   base64enc_0.1-3             ggridges_0.5.2              png_0.1-7                   viridisLite_0.3.0           bitops_1.0-6                R.oo_1.24.0                 KernSmooth_2.23-17          blob_1.2.1                  stringr_1.4.0               parallelly_1.21.0           rstatix_0.6.0               readr_1.4.0                 jpeg_0.1-8.1                ggsignif_0.6.0              CNEr_1.24.0                 scales_1.1.1                memoise_1.1.0               magrittr_1.5                plyr_1.8.6                  ica_1.0-2                   zlibbioc_1.34.0             compiler_4.0.3              RColorBrewer_1.1-2          fitdistrplus_1.1-1          Rsamtools_2.4.0             listenv_0.8.0               pbapply_1.4-3               htmlTable_2.1.0             Formula_1.2-4               MASS_7.3-53                 mgcv_1.8-33                 tidyselect_1.1.0            forcats_0.5.0               stringi_1.5.3               yaml_2.2.1                  askpass_1.1                 latticeExtra_0.6-29         ggrepel_0.8.2               grid_4.0.3                  VariantAnnotation_1.34.0   
## [127] fastmatch_1.1-0             tools_4.0.3                 rio_0.5.16                  future.apply_1.6.0          rstudioapi_0.12             TFMPvalue_0.0.8             foreign_0.8-80              lsa_0.73.2                  gridExtra_2.3               farver_2.0.3                Rtsne_0.15                  digest_0.6.27               BiocManager_1.30.10         shiny_1.5.0                 pracma_2.2.9                Rcpp_1.0.5                  car_3.0-10                  broom_0.7.2                 later_1.1.0.1               RcppAnnoy_0.0.16            OrganismDbi_1.30.0          httr_1.4.2                  AnnotationDbi_1.50.3        ggbio_1.36.0                biovizBase_1.36.0           colorspace_2.0-0            XML_3.99-0.3                tensor_1.5                  reticulate_1.18             splines_4.0.3               uwot_0.1.8.9001             RBGL_1.64.0                 RcppRoll_0.3.0              spatstat.utils_2.1-0        plotly_4.9.2.1              xtable_1.8-4                jsonlite_1.7.1              poweRlaw_0.70.6             spatstat_1.64-1             R6_2.5.0                    Hmisc_4.4-1                 pillar_1.4.6               
## [169] htmltools_0.5.1.1           mime_0.9                    glue_1.4.2                  fastmap_1.0.1               DT_0.16                     BiocParallel_1.22.0         codetools_0.2-17            lattice_0.20-41             tibble_3.0.4                curl_4.3                    leiden_0.3.5                gtools_3.8.2                zip_2.1.1                   openxlsx_4.2.3              GO.db_3.11.4                openssl_1.4.3               survival_3.2-7              rmarkdown_2.5               munsell_0.5.0               GenomeInfoDbData_1.2.3      haven_2.3.1                 reshape2_1.4.4              gtable_0.3.0